/*******************************************************************************
Project:		Wealth -- Smith, Zidar, and Zwick
Last modified: 	2021-06-15
Modified by:	Dustin Swonder
Description:	This files builds inputs for a version of the interest rate CMD 
				exercise in which group 1 is the top 1% and group 2 is the 
				bottom 99%. It pulls data from SZ parameters, FRED, the SCF, and 
				collapses of tax data.
*******************************************************************************/

/*******************************************************************************
	(0) Prepare interest rates and CPI adjustments data from FRED
*******************************************************************************/

freduse AAA BAA DGS10 DGS5 CPIAUCSL, clear

gen year = yofd(daten)

collapse (mean) CPIAUCSL r_aaa = AAA r_baa = BAA r_ust10 = DGS10 r_ust5 = DGS5, by(year)

qui keep if inrange(year, 1966, 2019)

foreach rate of varlist r_* {
	gen ln_`rate' = ln(`rate')
}

qui sort year
assert year[_N] == 2019

qui gen cpi_adjfactor = CPIAUCSL[_N] / CPIAUCSL
drop CPIAUCSL

tempfile fred
save `fred'

/*******************************************************************************
	(1) Prepare counts of equal-split individuals by year
*******************************************************************************/

import excel using $litroot/SZ2020Revisionists/PSZ2020Programs/parameters.xlsx, ///
    firstrow clear

keep yr totadults20 ttinttaxw
qui keep if !missing(totadults20) & inrange(yr, 1966, 2019)

rename (yr totadults20) (year num_ind)

* Scale equal split individuals out of thousands
assert num_ind < 3E5 // Raw data are not the correct order of magnitude
replace num_ind = num_ind * 1E3

merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(3) nogen

qui replace ttinttaxw = ttinttaxw * cpi_adjfactor

* Fixed claims assets are in millions; scale into trillions
qui gen a_total_sz20 = ttinttaxw / 1E6
qui gen ln_a_total_sz20 = ln(a_total_sz20)

isid year
sort year

tempfile parameters
save `parameters'

/*******************************************************************************
	(2) Prepare SCF data
*******************************************************************************/

foreach rankvar in niw intinc agi nonintagi { // Cycle through ranking variables

	/***************************************************************************
		(2.1) Load data and drop redundant variables; rename for simplicity
	***************************************************************************/

	use $dtadir/scf_revision.dta, clear // Load microfile

	gen fixmutf = (0.5 * comutf) + gbmutf + obmutf + mmmf + (0.5 * omutf)

	* Keep only necessary variables
	keep year wgt networth_pref intinc agi fixmutf inttaxw married

	qui gen niw = networth_pref - inttaxw - fixmutf // Create non-interest wealth
	qui gen nonintagi = agi - intinc

	/***************************************************************************
		(2.2) Implement equal-split ranking
	***************************************************************************/

	ds year wgt `rankvar' married, not
	es_rank_scf, rankvar(`rankvar') othersplitvars("`r(varlist)'")

	qui gen `rankvar'_grp = cond(es_rank > 0.999, 1, 2) // Group into top 0.1/bot99.9

	/***************************************************************************
		(2.3) Collapse to yield sums of interest income and fixed claims by 	
			group, then clean up a bit
	***************************************************************************/

	foreach unweighted of varlist intinc inttaxw { // Weight variables to collapse
		qui replace `unweighted' = `unweighted' * wgt
	}

	collapse (sum) y = intinc a = inttaxw, by(year `rankvar'_grp)

	* Scale down into trillions
	qui replace y = y / 1E12
	qui replace a = a / 1E12 

	/***************************************************************************
		(2.4) Calculate interest rate
	***************************************************************************/

	qui gen r = (y / a) * 100 // Compute interest rate

	/***************************************************************************
		(2.5) Reshape data as wide and compute top group's share of fixed claims
	***************************************************************************/

	qui reshape wide y a r, i(year) j(`rankvar'_grp) // Reshape by group

	qui gen a_total_scf = a1 + a2
	qui gen sa1 = a1 / a_total // Compute top 1's share of fixed claims assets

	qui summ sa1 // Report top 1's average share of fixed claims assets over years
	qui egen mean_sa1 = mean(sa1) // Store avg in variables
	drop sa1

	/***************************************************************************
		(2.6) Merge on CPI adjustment factor and scale to 2019 dollars; also, 
			put variables of interest in logs
	***************************************************************************/

	qui merge 1:1 year using `fred', assert(2 3) keep(3) keepusing(cpi_adjfactor) nogen

	foreach dollarvar of varlist y1 y2 a1 a2 a_total_scf {
		qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	}
	drop cpi_adjfactor

	foreach logvar of varlist y1 y2 a1 a2 r1 r2 a_total_scf {
		gen ln_`logvar' = ln(`logvar')
	}

	/***************************************************************************
		(2.7) Clean up, sort, and save as tempfile
	***************************************************************************/

	isid year
	sort year

	tempfile scf`rankvar'
	qui save `scf`rankvar''
}

/*******************************************************************************
	(3) Prepare tax data
*******************************************************************************/

	/***************************************************************************
		(3.1) Rank by interest income (received from Drew 2021-02-04)
	***************************************************************************/

use $rawdir/inside_cmd_inputs_20210209/fixinc_total_ranked_by_fixinc.dta, clear

isid year
rename fixinc? y?

qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

foreach dollarvar of varlist y1 y2 {
	qui replace `dollarvar' = `dollarvar' / 1E12 // Scale down to trillions
	qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	qui gen ln_`dollarvar' = ln(`dollarvar')
}
drop cpi_adjfactor

tempfile taxdata_fixincranks
save `taxdata_fixincranks'

	/***************************************************************************
		(3.2) Rank by non-interest wealth (this is hweal112 - taxbond_3tier 
			before 2001, hweal112 - taxbond_info afterwards)
	***************************************************************************/

forv year = 1966 / 2016 {

	local wlthdefn = cond(`year' < 1980, $preferred_defn_early + 100, ///
					 cond(`year' < 2001, $preferred_defn_mid + 100, $preferred_defn_late + 100))

	qui import delimited using $rawdir/inside_collapses/2021_05_20/wealth_scen`wlthdefn'_es_`year'.csv, clear

	keep group fiint intest taxbond
	qui gen year = `year'

	qui keep if inlist(group, "All", "P99.9-100")

	qui replace group = cond(group == "All", "_all", "_top01")

	qui gen fixinc = fiint + intest
	drop fiint intest

	qui reshape wide fixinc taxbond, i(year) j(group, string)

	qui gen fixinc_bot99pt9 = fixinc_all - fixinc_top01 

	qui gen r_bar = (fixinc_all / taxbond_all) * 100

	keep year fixinc_all fixinc_top01 fixinc_bot99pt9 taxbond_all r_bar

	rename (fixinc_all fixinc_top01 fixinc_bot99pt9 taxbond_all) (y_total y1 y2 a_total)

	qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

	foreach dollarvar of varlist y1 y2 y_total a_total {
		qui replace `dollarvar' = `dollarvar' / 1E12 // Scale down into trillions
		qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	}
	drop cpi_adjfactor

	foreach logvar of varlist y1 y2 a_total r_bar {
		qui gen ln_`logvar' = ln(`logvar')
	}

	tempfile nonintwealth`year'
	qui save `nonintwealth`year''
}

use `nonintwealth1966', clear
forv year = 1967 / 2016 {
	append using `nonintwealth`year''
}

tempfile taxdata_niwranks
save `taxdata_niwranks'

	/***************************************************************************
		(3.3) Rank by AGI
	***************************************************************************/

use $rawdir/inside_cmd_inputs/fixinc_total_ranked_by_agi.dta, clear

gen consolidated_grp = cond(inlist(group, "P99.9-99.99", "P99.99-100"), 1, 2)

collapse (sum) y = fixinc, by(year consolidated_grp)

reshape wide y, i(year) j(consolidated_grp)

assert !missing(y1) & !missing(y2)

qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

foreach dollarvar of varlist y1 y2 {
	qui replace `dollarvar' = `dollarvar' / 1E12 // Scale into trillions
	qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	qui gen ln_`dollarvar' = ln(`dollarvar')
}
drop cpi_adjfactor

tempfile taxdata_agiranks
save `taxdata_agiranks'

	/***************************************************************************
		(3.4) Rank by non-interest AGI (received from Drew 2021-02-04)
	***************************************************************************/

use $rawdir/inside_cmd_inputs_20210422/fixinc_total_ranked_by_agi_minus_fixinc.dta, clear

gen consolidated_grp = cond(group == "P99.9-100", 1, 2)

collapse (sum) y = fixinc, by(year consolidated_grp)

reshape wide y, i(year) j(consolidated_grp)

assert !missing(y1) & !missing(y2)

qui merge 1:1 year using `fred', keepusing(cpi_adjfactor) assert(2 3) keep(3) nogen

foreach dollarvar of varlist y1 y2 {
	qui replace `dollarvar' = `dollarvar' / 1E12 // Scale into trillions
	qui replace `dollarvar' = `dollarvar' * cpi_adjfactor
	qui gen ln_`dollarvar' = ln(`dollarvar')
}
drop cpi_adjfactor

sort year

tempfile taxdata_nonintagiranks
save `taxdata_nonintagiranks'

/*******************************************************************************
	(4) Merge data together
*******************************************************************************/

	/***************************************************************************
		(4.1) Merge on collapses from tax data (incl DINA for the moment) and 
			rename flow concepts to keep track of what's ranked by what
	***************************************************************************/

use `taxdata_fixincranks', clear
rename *y? *y?_fixincrank

merge 1:1 year using `taxdata_agiranks', assert(3) nogen
rename *y? *y?_fiincrank

merge 1:1 year using `taxdata_niwranks', assert(3) nogen
rename *y? *y?_niwrank

merge 1:1 year using `taxdata_nonintagiranks', assert(3) nogen
rename *y? *y?_nonintagirank

	/***************************************************************************
		(4.2) Merge on parameters aggregates and compute and r bar with updated
			SZ 2020 aggregate fixed claims concepts
	***************************************************************************/

merge 1:1 year using `parameters', keepusing(a_total_sz20 ln_a_total_sz20) assert(2 3)
assert year > 2016 if _merge == 2
assert _merge == 3 if year < 2017
drop _merge

qui gen r_bar_sz20 = (y_total / a_total_sz20) * 100
qui gen ln_r_bar_sz20 = ln(r_bar_sz20)
drop y_total

	/***************************************************************************
		(4.3) Merge on credit and interest rate risk measures from FRED
	***************************************************************************/

merge 1:1 year using `fred', keepusing(r_* ln_r_*) assert(3) nogen

	/***************************************************************************
		(4.4) Merge on SCF collapses, renaming concepts so that we can keep 
			track of different ranks
	***************************************************************************/

foreach rankvar in niw intinc agi nonintagi { // Cycle through ranking variables

	merge 1:1 year using `scf`rankvar'', assert(1 3)
	assert _merge == 1 if !inrange(year, 1989, 2019) | mod(year - 1989, 3) > 0
	assert _merge == 3 if inrange(year, 1989, 2019) & mod(year - 1989, 3) == 0
	drop _merge

 	#delimit ;
	rename (*y? *a1 *a2 *r?) 
		(*y?_scf_`rankvar'rank *a1_scf_`rankvar'rank *a2_scf_`rankvar'rank *r?_scf_`rankvar'rank);
	#delimit cr
}

	/***************************************************************************
		(4.5) Clean up, sort, and save
	***************************************************************************/

qui ds year, not
order `r(varlist)', alphabetic last

isid year
sort year
export delimited using $dtadir/cmdinputs_top01bot99pt9_20210615.csv, replace